import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveRegressor
data=pd.read_csv('C:/Users/Rakesh/Datasets/Instagram.csv', encoding='latin1')
data.head()
| Impressions | From Home | From Hashtags | From Explore | From Other | Saves | Comments | Shares | Likes | Profile Visits | Follows | Caption | Hashtags | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3920.0 | 2586.0 | 1028.0 | 619.0 | 56.0 | 98.0 | 9.0 | 5.0 | 162.0 | 35.0 | 2.0 | Here are some of the most important data visua... | #finance #money #business #investing #investme... |
| 1 | 5394.0 | 2727.0 | 1838.0 | 1174.0 | 78.0 | 194.0 | 7.0 | 14.0 | 224.0 | 48.0 | 10.0 | Here are some of the best data science project... | #healthcare #health #covid #data #datascience ... |
| 2 | 4021.0 | 2085.0 | 1188.0 | 0.0 | 533.0 | 41.0 | 11.0 | 1.0 | 131.0 | 62.0 | 12.0 | Learn how to train a machine learning model an... | #data #datascience #dataanalysis #dataanalytic... |
| 3 | 4528.0 | 2700.0 | 621.0 | 932.0 | 73.0 | 172.0 | 10.0 | 7.0 | 213.0 | 23.0 | 8.0 | Heres how you can write a Python program to d... | #python #pythonprogramming #pythonprojects #py... |
| 4 | 2518.0 | 1704.0 | 255.0 | 279.0 | 37.0 | 96.0 | 5.0 | 4.0 | 123.0 | 8.0 | 0.0 | Plotting annotations while visualizing your da... | #datavisualization #datascience #data #dataana... |
data.isnull().sum()
Impressions 1 From Home 1 From Hashtags 1 From Explore 1 From Other 1 Saves 1 Comments 1 Shares 1 Likes 1 Profile Visits 1 Follows 1 Caption 1 Hashtags 1 dtype: int64
data=data.dropna()
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 99 entries, 0 to 98 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Impressions 99 non-null float64 1 From Home 99 non-null float64 2 From Hashtags 99 non-null float64 3 From Explore 99 non-null float64 4 From Other 99 non-null float64 5 Saves 99 non-null float64 6 Comments 99 non-null float64 7 Shares 99 non-null float64 8 Likes 99 non-null float64 9 Profile Visits 99 non-null float64 10 Follows 99 non-null float64 11 Caption 99 non-null object 12 Hashtags 99 non-null object dtypes: float64(11), object(2) memory usage: 10.8+ KB
plt.figure(figsize=(10,8))
plt.style.use('fivethirtyeight')
plt.title('Distribution of Impressions From Home')
sns.distplot(data['From Home'])
plt.show()
C:\Users\Rakesh\Downloads\Anaconda\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
plt.figure(figsize=(10, 8))
plt.title("Distribution of Impressions From Hashtags")
sns.distplot(data['From Hashtags'])
plt.show()
C:\Users\Rakesh\Downloads\Anaconda\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
plt.figure(figsize=(10, 8))
plt.title("Distribution of Impressions From Hashtags")
sns.distplot(data['From Hashtags'])
plt.show()
C:\Users\Rakesh\Downloads\Anaconda\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
plt.figure(figsize=(10, 8))
plt.title("Distribution of Impressions From Explore")
sns.distplot(data['From Explore'])
plt.show()
C:\Users\Rakesh\Downloads\Anaconda\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
home = data['From Home'].sum()
hashtags = data['From Hashtags'].sum()
explore = data['From Explore'].sum()
other = data['From Other'].sum()
labels = ['From Home', 'From Hashtags', 'From Explore', 'Other']
values = [home, hashtags, explore, other]
fig = px.pie(data,values=values, names=labels,
title='Impressions on Instagram Posts From Various Sources', hole=0.5)
fig.show()
text = " ".join(i for i in data.Caption)
stopwords = set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords, background_color='white').generate(text)
plt.style.use('classic')
plt.figure(figsize=(12,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
text = " ".join(i for i in data.Hashtags)
stopwords = set(STOPWORDS)
wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(text)
plt.style.use('classic')
plt.figure( figsize=(12,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
figure = px.scatter(data_frame = data, x='Impressions', y='Likes', size='Likes', trendline='ols', title = "Relationship Between Likes and Impressions")
figure.show()
figure = px.scatter(data_frame = data, x="Impressions",
y="Comments", size="Comments", trendline="ols",
title = "Relationship Between Comments and Impressions")
figure.show()
figure = px.scatter(data_frame = data, x="Impressions",
y="Shares", size="Shares", trendline="ols",
title = "Relationship Between Shares and Impressions")
figure.show()
figure = px.scatter(data_frame = data, x="Impressions",
y="Saves", size="Saves", trendline="ols",
title = "Relationship Between Saves and Impressions")
figure.show()
conversion_rate = (data['Follows'].sum() / data['Profile Visits'].sum()) * 100
print(conversion_rate)
31.17770767613039
figure = px.scatter(data_frame = data, x="Profile Visits",
y="Follows", size="Follows", trendline="ols",
title = "Relationship Between Saves and Impressions")
figure.show()
x = np.array(data[['Likes','Saves','Comments','Shares', 'Profile Visits','Follows']])
y = np.array(data['Impressions'])
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=42)
model = PassiveAggressiveRegressor()
model.fit(xtrain, ytrain)
model.score(xtest, ytest)
0.9356229525007367
features = np.array([[282.0, 233.0,4.0, 9.0, 165.0, 54.0]])
model.predict(features)
array([10689.78508263])